package com.dxf.bigdata.D02_demo1

import org.apache.spark.{SparkConf, SparkContext}

object S01_WordCount {

  def doSome(sc:SparkContext) = {
    //1 读取文件,获取一行一行的数据
    val lines = sc.textFile("datas/*")
    //2 空格切分
    val words = lines.flatMap(_.split(" "))

    //3 分组
    // (hello,hello,hello),(word,word)
    val wordGroup = words.groupBy(word => word)
    //4 统计
    //(hello,3),(word,2)
    val wordToCount= wordGroup.map{
      case (word,list)=>{
        (word,list.size)
      }
    }

    val array:Array[(String,Int)] = wordToCount.collect()

    array.foreach(println)
  }

  def main(args: Array[String]): Unit = {

      //1 建立和spark的连接

    val sparkConf = new SparkConf().setMaster("local").setAppName("wordCount")
    val sc = new SparkContext(sparkConf)


      //2 处理业务
      doSome(sc)

      //3 关闭连接
      sc.stop()


  }
}
